export SGLANG_SET_CPU_AFFINITY=1
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export STREAMS_PER_DEVICE=32
export HCCL_BUFFSIZE=1536
export HCCL_OP_EXPANSION_MODE=AIV
python -m sglang.launch_server \
--device npu \
--attention-backend ascend \
--trust-remote-code \
--tp-size 4 \
--model-path Qwen/Qwen3-32B \
--mem-fraction-static 0.8